We need some data to play with

Individual bivalves collected from soft sediment at One Tree Reef with measurements. This is a similified version of the data related to this research:

Julieta C. Martinelli, Matthew A. Kosnik, And Joshua S. Madin Passive Defensive Traits Are Not Good Predictors Of Predation For Infaunal Reef Bivalves PALAIOS, 2016, v. 31, 607–615 . http://dx.doi.org/10.2110/palo.2016.018

Matthew A. Kosnik, Quan Hua, Darrell S. Kaufman, Atun Zawadzki Sediment accumulation, stratigraphic order, and the extent of time-averaging in lagoonal sediments: a comparison of 210Pb and 14C/amino acid racemization chronologies Coral Reefs (2015) 34:215–229. http://dx.doi.org/10.1007/s00338-014-1234-2

otiShells <- read.delim('./data/oti_measurements.txt')

WHAT HAVE WE GOT HERE?

head(otiShells)
##               taxonName siteName  x_mm y_mm z_mm mass_mg
## 1 Pinguitellina robusta      2nd  9.84 8.36 2.34  110.96
## 2 Pinguitellina robusta      2nd  8.02 6.63 1.55   50.57
## 3 Pinguitellina robusta      2nd  6.47 5.15 1.39   28.20
## 4     Exotica clathrata      2nd 11.18 5.87 1.55   39.60
## 5     Exotica clathrata      2nd 12.83 6.62 1.80   74.19
## 6     Exotica clathrata      2nd 10.80 5.90 1.53   45.62
summary(otiShells)      # ANYTHING FISHY?
##                  taxonName    siteName        x_mm            y_mm       
##  Abranda jeanae       :1469   1st:1925   Min.   : 0.00   Min.   : 0.000  
##  Pinguitellina robusta: 710   2nd: 908   1st Qu.: 8.24   1st Qu.: 5.830  
##  Exotica clathrata    : 522   3rd: 671   Median :10.54   Median : 7.330  
##  Scissulina dispar    : 329              Mean   :11.42   Mean   : 8.088  
##  Fragum fragum        : 173              3rd Qu.:13.84   3rd Qu.: 9.800  
##  Ctena bella          : 138              Max.   :62.34   Max.   :36.190  
##  (Other)              : 163                                              
##       z_mm          mass_mg    
##  Min.   : 0.00   0.00   : 202  
##  1st Qu.: 1.38   \\N    :  46  
##  Median : 1.77   11.00  :  18  
##  Mean   : 1.93   14.00  :  17  
##  3rd Qu.: 2.34   10.00  :  12  
##  Max.   :11.85   12.00  :  12  
##                  (Other):3197
otiShells <- read.delim('./data/oti_measurements.txt', na.strings='\\N')
otiShells <- otiShells[(otiShells$mass_mg > 0),]
otiShells <- otiShells[(otiShells$x_mm > 0),]
otiShells[1:10,"x_mm"] 
##  [1]  9.84  8.02  6.47 11.18 12.83 10.80 10.20 14.32 32.95 30.98
otiShells[["x_mm"]][1:10]
##  [1]  9.84  8.02  6.47 11.18 12.83 10.80 10.20 14.32 32.95 30.98
otiShells$x_mm[1:10]
##  [1]  9.84  8.02  6.47 11.18 12.83 10.80 10.20 14.32 32.95 30.98

IF YOU ARE GOING TO USE IT A BUNCH - MAKE A NEW COLUMN FOR THE TRANSFORMED VARIABLE

otiShells$crMass <- otiShells$mass_mg^(1/3)
? hist()
hist(otiShells$x_mm)

hist(otiShells$x_mm, xlab='Shell size (mm)')

hist(otiShells$x_mm, xlab='Shell size (mm)', col='seagreen', main="Figure 1")

hist(log(otiShells$mass_mg), xlab="log(Shell mass) (mg)", col=rgb(0,0.1,0.5), main="Figure 1")

QUESTION: which log is this?

WHAT ELSE CAN hist() DO?

aHist <- hist(log2(otiShells[grep('Abranda',otiShells$taxonName),'mass_mg']), plot=FALSE)

QUESTION: tell me about object aHist

aHist
## $breaks
##  [1]  0  1  2  3  4  5  6  7  8  9 10
## 
## $counts
##  [1]   2  12  59 143 212 301 293 290 102   9
## 
## $density
##  [1] 0.001405481 0.008432888 0.041461701 0.100491918 0.148981026
##  [6] 0.211524947 0.205903022 0.203794800 0.071679550 0.006324666
## 
## $mids
##  [1] 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5 8.5 9.5
## 
## $xname
## [1] "log2(otiShells[grep(\"Abranda\", otiShells$taxonName), \"mass_mg\"])"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
  • MAKE A HISTOGRAM FOR LOG ABRANDA SHELL MASS… WITH NO AXES OR LABELS
plot(aHist, col='seagreen', ann=FALSE, axes=FALSE)

  • ADD Y axis() MANUALLY (ALLOWS FOR GREATER CONTROL)
? axis()
axis(side= 2)   # identical to what would have been drawn from hist()

  • SPECIFY A COMPLEX AXIS
where <- seq(floor(min(aHist$breaks)),ceiling(max(aHist$breaks)),by=2)
what <- 2^where
axis(1, at=where, labels=what, lwd=2, lty=3, col='blue', font=3)    # specify details of line 

  • ANOTHER WAY TO ADD MARGIN TEXT (AKA - LABEL)
? mtext()
mtext("Abranda", side=3, line=-2, font=4, adj=0.1, cex=2)   # big, italic font right side

USE mtext() to add title to x-axis

mtext("Shell mass (mg)", side=1, line=2)

WHAT OTHER OPTIONS DOES HISTOGRAM HAVE?

? hist()
  • SET breaks
aHist <- hist(otiShells[grep('Abranda',otiShells$taxonName),'x_mm'], plot=FALSE, breaks=0:28)

aHist
plot(aHist, col='seagreen', ann=FALSE, axes=TRUE, las=1)

## $breaks
##  [1]  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22
## [24] 23 24 25 26 27 28
## 
## $counts
##  [1]   0   0   0   1   5  24  39  69  90 107 119 104 117 110  91 106 110
## [18] 101  74  51  41  35  19   6   2   1   1   0
## 
## $density
##  [1] 0.0000000000 0.0000000000 0.0000000000 0.0007027407 0.0035137034
##  [6] 0.0168657765 0.0274068869 0.0484891075 0.0632466620 0.0751932537
## [11] 0.0836261420 0.0730850316 0.0822206606 0.0773014758 0.0639494027
## [16] 0.0744905130 0.0773014758 0.0709768096 0.0520028110 0.0358397751
## [21] 0.0288123682 0.0245959241 0.0133520731 0.0042164441 0.0014054814
## [26] 0.0007027407 0.0007027407 0.0000000000
## 
## $mids
##  [1]  0.5  1.5  2.5  3.5  4.5  5.5  6.5  7.5  8.5  9.5 10.5 11.5 12.5 13.5
## [15] 14.5 15.5 16.5 17.5 18.5 19.5 20.5 21.5 22.5 23.5 24.5 25.5 26.5 27.5
## 
## $xname
## [1] "otiShells[grep(\"Abranda\", otiShells$taxonName), \"x_mm\"]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"

use mtext() to add labels & text…

mtext("Abranda", side=3, line=-2, font=4, adj=0.1, cex=2)   # big, italic font right side
mtext("Shell length (mm)", side=1, line=2)
mtext("Frequency", side=2, line=2.5)

  • BIVARIATE PLOTS
? plot()

subset the data to get only Pinguitellina

pingData <- otiShells[grep('Pingui',otiShells$taxonName),]

SPECIFY THE COLUMN WITH THE X DATA AND THE COLUMN WITH THE Y DATA

plot() x_mm on the x-axis vs mass_mg on the y-axis

plot(pingData$x_mm, pingData$mass_mg)

ALTERNATIVELY, USE FORMULA NOTATION (SAME PLOT, BUT BETTER DEFAULT AXIS LABELS)

plot(mass_mg ~ x_mm, pingData)

PLOT IT WITH A CUBE ROOT TRANSFORM ON THE Y AXIS?

plot((mass_mg)^(1/3) ~ x_mm, pingData) 

LETS MAKE NICER AXIS LABELS USING xlab, ylab & col

plot((mass_mg)^(1/3) ~ x_mm, pingData, xlab="Max length (mm)", ylab="cuberoot of mass (mg)", col='darkblue')

LETS MAKE IT PLOT LINES INSTEAD (what other options are there?)

plot((mass_mg)^(1/3) ~ x_mm, pingData, xlab="Max length, mm", ylab="cuberoot of mass (mg)", col='darkblue', type='l')

LETS MAKE IT PLOT THE AXES, BUT NOT ANY POINTS!

plot(crMass ~ x_mm, otiShells, xlab="Max length (mm)", ylab="cuberoot of mass (mg)", col='darkblue', type='n')

?? WHY WOULD WE WANT TO DO THAT??

? points()

LETS PLOT THE “Pinguitellina” as red circles

points(crMass ~ x_mm, data= otiShells[grep('Pingui',otiShells$taxonName),], col="red", pch=21) 

LETS PLOT THE “Abranda” as green squares

points(crMass ~ x_mm, data= otiShells[grep('Abranda',otiShells$taxonName),], col="green", pch=21) 

WHAT ELSE CAN WE SPECIFY FOR POINTS?

PLOT IS SUPER GENERAL (IT WILL TRY TO PLOT ANYTHING). FACTORS DEFAULT TO BOXPLOTS…

plot(crMass ~ siteName, otiShells)

  • BOX PLOTS
? boxplot()
boxplot(pingData$x_mm, col='lightblue')

  • SPLIT IT BY “siteName”?
boxplot(x_mm ~ siteName, pingData, col='skyblue') 

BETTER AXIS LABELS

boxplot(x_mm ~ siteName, pingData, xlab="One Tree Reef Lagoon", ylab="Max shell length, mm") 

ADD notch is?

boxplot(x_mm ~ siteName, pingData, xlab="One Tree Reef Lagoon", ylab="Max shell length, mm", notch = TRUE)

?? WHAT ARE THE notch is??

  • OVERLAY THE POINTS? ( IT WORKS - IF WE MAKE Root.herbivore A FACTOR )
boxplot(x_mm ~ siteName, pingData, xlab="One Tree Reef Lagoon", ylab="Max shell length, mm", notch = TRUE, col='lightgrey')
points(pingData$siteName, pingData$x_mm, cex=0.5)

  • EXPLORATORY PLOTTING
? pairs()

A VERY COOL FUCNTION, BUT ONLY WORKS WITH NUMERIC DATA Great way to quickly access collinearity among variables

pairs(pingData)

WHICH COLUMNS HAVE NUMERIC DATA??

names(pingData) 
## [1] "taxonName" "siteName"  "x_mm"      "y_mm"      "z_mm"      "mass_mg"  
## [7] "crMass"
comps <- names(pingData)[3:7]
comps
## [1] "x_mm"    "y_mm"    "z_mm"    "mass_mg" "crMass"

PLOT ALL PAIR-WISE COMPARISONS FOR NUMERIC Pinguitellina DATA

pairs(pingData[comps])

and an explanation for why there are two relations instead of 1!

3D PLOTTING

LETS USE THE BUILT IN volcano DATASET FOR THIS…

?volcano

TO GOOD BUILT IN FUNCTIONS ARE image() AND contour()

image(volcano)

contour(volcano)

SPECIFY THE COLOUR GRADIENT

image(volcano, col=terrain.colors(50))

“ADD” CONTOURS OVER TOP OF image()

contour(volcano, add=TRUE)

It is possible to read shape files, and do basic GIS type things in R.

I PERSONALLY REALLY DISLIKE BARPLOTS, BUT THEY ARE COMMONLY USED R DOES NOT HAVE A STANDARD ERROR FUNCTION, BUT WE CAN WRITE ONE…

standard.error <- function(x) { sd(x)/sqrt(length(x)) }

REMEMBER ALL THE WAY BACK TO THE MORNING… USE aggregate TO GET THE mean otiShells['x_mm'] BY otiShells["taxonName"] + TIP: USING otiShells["x_mm"] INSTEAD OF otiShells$x_mm GIVES YOU NICER COLUMN NAMES

mn <- aggregate(otiShells['x_mm'], otiShells["taxonName"], mean) 
bp <- barplot(mn$x_mm)

NOTE: BY ASSIGNING barplot() TO bp… WE CAN DO THE NEXT STEP

se <- aggregate(otiShells['x_mm'], otiShells["taxonName"], standard.error)

ADD THE STANDARD ERROR USING arrows

arrows(bp, mn$x_mm + se$x_mm, bp, mn$x_mm - se$x_mm, code=3, angle=90, length=0.1, lwd=2)

?? DOESN’T QUITE LOOK RIGHT

MAKE A BAR PLOT AND SPECIFY Y-AXIS ylim SO THAT THE WHOLE SE FITS IN…

bp <- barplot(mn$x_mm, ylim=c(0, max(mn$x_mm + se$x_mm, na.rm=TRUE))) 
arrows(bp, mn$x_mm + se$x_mm, bp, mn$x_mm - se$x_mm, code=3, angle=90, length=0.1, lwd=2)

ADD LABELS USING axis()

axis(1, at=bp, labels=mn$taxonName, font=3) 

STILL NOT WHAT WE WANT BECAUSE R DOES SKIPS LABELS THAT WOULD OVER WRITE try again but rotating the axis labels 90 degrees

bp <- barplot(mn$x_mm, ylim=c(0, max(mn$x_mm + se$x_mm, na.rm=TRUE))) 
arrows(bp, mn$x_mm + se$x_mm, bp, mn$x_mm - se$x_mm, code=3, angle=90, length=0.1, lwd=2)
axis(1, at=bp, labels= mn$taxonName, las=2, font=3)
title("Getting it just right can be a pain...")

HOW DO WE FIX THE MARGIN SIZE… SHOW ME HOW TO DO THAT CRAZY VOODOO THAT YOUDOO? REMEMBER IF YOU DIG DEEP ENOUGH YOU CAN TINKER WITH ANYTHING IN R Afraid? “You will be. You… will… be.” (Yoda)

GLOBAL GRAPHIC PARAMETERS

?par

SAVE CURRENT / ORIGINAL PARAMETERS

oldPar <- par()

SO HOW CAN WE FIX OUR MARGIN ISSUE? use par() to set margin. hint: try 12 for margin 1

par(mar=c(12, 4, 4, 2)) 
bp <- barplot(mn$x_mm, ylim=c(0, max(mn$x_mm + se$x_mm, na.rm=TRUE))) 
arrows(bp, mn$x_mm + se$x_mm, bp, mn$x_mm - se$x_mm, code=3, angle=90, length=0.1, lwd=2)
axis(1, at=bp, labels= mn$taxonName, las=2, font=3, cex=0.7)
mtext("Species", side=1, line=10) 
mtext("Shell length (mm)", side=2, line=3) 
title("... but it can be worth it?")

RESTORE ORIGINAL PARAMETERS

par(oldPar)

SAVING PLOTS

dir.create("./figs")

if(!("./figs") %in% list.dirs(".")) dir.create("./figs")

START A PDF FILE TO WRITE TO

pdf("./figs/my_barplot.pdf", width=5, height=5) 

WHAT ARE THE UNITS OF WIDTH AND HEIGHT

par(mar=c(12, 4, 4, 2)) 
bp <- barplot(mn$x_mm, ylim=c(0, max(mn$x_mm + se$x_mm, na.rm=TRUE)), las=1) 
arrows(bp, mn$x_mm + se$x_mm, bp, mn$x_mm - se$x_mm, code=3, angle=90, length=0.04, lwd=2)
axis(1, at=bp, labels= mn$taxonName, las=2, font=3, cex=0.7)
mtext("Species", side=1, line=10) 
mtext("Shell length (mm)", side=2, line=2.5) 
title("One Tree Reef Bivalvia") 
dev.off()
## quartz_off_screen 
##                 2

MUST ALWAYS USE dev.off() WHEN YOU ARE DONE WRITING TO THE FILE… OR IT STAYS OPEN…

START A PNG FILE

png(filename="./figs/my_barplot.png", width=400, height=400) 
par(mar=c(12, 4, 4, 2)) 
bp <- barplot(mn$x_mm, ylim=c(0, max(mn$x_mm + se$x_mm, na.rm=TRUE)), las=1) 
arrows(bp, mn$x_mm + se$x_mm, bp, mn$x_mm - se$x_mm, code=3, angle=90, length=0.04, lwd=2)
axis(1, at=bp, labels= mn$taxonName, las=2, font=3, cex=0.7)
mtext("Species", side=1, line=10) 
mtext("Shell length (mm)", side=2, line=2.5) 
title("One Tree Reef Bivalvia") 
dev.off()
## quartz_off_screen 
##                 2

MULTIPLE PANEL PLOTS

"mfrow" FILLS YOUR PANELS ACROSS THE TOP ROW AND THEN SEQUENTIAL ROWS DOWN "mfcol" FILLS YOUR PANELS DOWN THE LEFT MOST COLUMN FIRST… `

oldPar <- par()

MAKE 4 HISTOGRAMS

par(mfcol=c(2,2)) 
plot(aHist, col='blue', ann=FALSE, main='Abranda') 
pHist <- hist(pingData$x_mm, col='green', main='Pinguitellina') 
hist(pingData$y_mm, col='red') 
plot(aHist,col='pink', ann=FALSE) 

par(oldPar)

LETS MAKE IT NICER… + SET MARGINS ALL TO 1 + SET THE OUTER MARGINS TO c(4,3,4,2)

par(mfrow=c(2, 2), mar=c(1, 1, 1, 1), oma=c(4, 3, 4, 2))

LETS ONLY PUT THE OUTER AXES ON (COMMON IF THE AXES ARE THE SAME) LETS LABEL EACH PANEL A-D IN THE UPPER LEFT CORNER

plot(aHist, ann=FALSE, axes=FALSE, col='forestgreen', xlim=range(pHist$breaks, aHist$breaks)) 
axis(2) 
mtext("A", 3, -2, adj = 0.1, font=2, cex=1.2) 
hist(pingData$x_mm, ann=FALSE, axes=FALSE, col='skyblue') 
mtext("B", 3, -2, adj = 0, font=2, cex=1.2) 
plot(pHist, ann=FALSE, col='pink', xlim=range(pHist$breaks, aHist$breaks)) 
mtext("C", 3, -2, adj = 0.1, font=2, cex=1.2) 
hist(pingData$x_mm, ann=FALSE, axes=FALSE, col='grey40') 
axis(1) 
mtext("D", 3, -2, adj = 0, font=2, cex=1.2)

JUST TO DEMONSTRATE THE POSSIBLE BOXES…

? box()
box("plot", col="red") 
box("figure", col="blue") 
box("inner", col="black") 
box("outer", col="pink")

mtext REALLY SHINES - PUTTING JOINT AXIS LABELS outer

mtext("Shell size (mm)", side=1, outer=TRUE, line=2) 
mtext("Frequency", side=2, outer=TRUE, line=2) 
mtext("Four plots of shell size", side=3, outer=TRUE, line=1, cex=1.5) 

par(oldPar)

ADDING ADDITIONAL THINGS TO PLOTS

plot(crMass ~ x_mm, pingData, type="p", axes=FALSE, ann=FALSE, pch=21, col="white", bg="black") 
axis(1) 
axis(2, las=2) 
mtext("Shell size (mm)", side = 1, line = 3) 
mtext("Cuberoot shell mass (mg)", side = 2, line = 3) 
title("Figure 1", adj=0)

GET/PLOT A BEST FIT LINE USING lm()

mod <- lm(crMass ~ x_mm, pingData)

PLOT THE LINE USING abline

abline(mod, lwd=2, lty=2, col='blue')

GET/PLOT THE PREDICTION INTERVALS

hs <- seq(min(pingData$x_mm), max(pingData$x_mm), 1) 
intPred <- predict(mod, list(x_mm = hs), interval = "prediction") 
lines(hs, intPred[,"lwr"], lty = 2) 
lines(hs, intPred[,"upr"], lty = 2) 

GET/PLOT THE CONFIDENCE INTERVALS

intConf <- predict(mod, list(x_mm = hs), interval = "confidence") 
lines(hs, intConf[,"lwr"]) 
lines(hs, intConf[,"upr"]) 

PLOT A SHADED THE PREDICTION INTERVAL REGION

polygon(c(hs, rev(hs)), c(intPred[,"lwr"], rev(intPred[,"upr"])), col = rgb(0,0,0,0.2), border = NA)

LETS ADD THE R-SQUARED TO THE PLOT + TIP: UNICODE CHARACTERS ARE AN EASY WAY TO GET SOME CHARACTERS

text(5,4,paste("r\U00b2 = ",round(summary(mod)$r.squared,2)))

ADD A LEGEND

legend("topleft", c("green data", "actual data", "orange cross"), pch=c(4, 20, 3), col=c("green", "black", "orange"), bty="y")

ADD SOME MATH

text(11, 2, expression(Y[i] == beta[0]+beta[1]*X[i1]+epsilon[i])) # see demo(plotmath) for more examples 

see demo(plotmath) for more examples NOW… WRAP THE SUPER NICE PLOTS INTO A PDF…